# -*- coding: utf-8 -*-
import scrapy

from yjd_douban.items import YjdDoubanItem
class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['movie.douban.com']
    offset = 0
    url = "https://movie.douban.com/top250?start="
    start_urls = [url + str(offset)]


    def parse(self, response):
        item = YjdDoubanItem()
        #response 是得到的爬取当前界面的 HTM所有数据L
        movies = response.xpath("//div[@class='info']")#利用XPath 获得 class = “info”的所有div 得到每条数据
        for each in movies: #利用XPath 获得 每条数据中需要的信息，同时给item 模型赋值
            item['title'] = each.xpath(".//span[@class='title'][1]/text()").extract()[0]
            item['bd'] = each.xpath(".//div[@class='bd']/p/text()").extract()[0]
            item['star'] = each.xpath(".//div[@class='star']/span[@class='rating_num']/text()").extract()[0]
            quote = each.xpath(".//p[@class='quote']/span/text()").extract()
            if len(quote) != 0:
                item['quote'] = quote[0]
            yield item

        #设置爬取内容
        if self.offset < 225:
            self.offset += 25
            #重复爬取 设定url
            yield scrapy.Request(self.url + str(self.offset), callback=self.parse)
